################################################################################
# CONGRESSIONAL INTERVENTION IN AGENCY ADJUDICATION
# AUTHORS: LINDSEY GAILMARD, DANIEL E. HO, MARK KRASS
################################################################################

################################################################################
# THIS FILE CREATES THE MATCHED PAIRS IN THE MAIN ANALYSIS.
################################################################################

# INSTALL PACKAGES

install.packages("svMisc")
install.packages("GenMatch")
install.packages("Matching")
install.packages("magick")
install.packages("cem")
install.packages("cobalt")
install.packages("mediation")

library(cobalt)
library(dplyr)
library(extrafont)
library(mediation)
require(magick)
require(tidyverse)
require(lubridate)
require(survival)
require(survminer)
require(GenMatch)
require(Matching)
require(MatchIt)
require(svMisc)
require(cem)


# SET LOCAL WORKING DIRECTORY
setwd("/Users/lindseygailmard/Dropbox/RegLab/BVA Congressional Intervention YLJ")

getwd()

################################################################################
# MATCHING PROCEDURE:
# RESTRICT ATTENTION TO APPEALS WITHOUT BOARD DECISION AT THE INTERVENTION DATE
# EXACT MATCH ON: RO
#                 GENDER
#                 SERVICE PERIOD
#                 REPRESENTATIVE AT BVA
#                 QR ELIGIBILITY
#                 ACTION TYPE
# PROPENSITY SCORE MATCH ON NOD DATE
################################################################################

table1 <-
  read.csv("./Replication Files/Data/table1.csv", header = TRUE)

table1$decided <- ifelse(is.na(table1$bfddec), 0, 1)

data_to_match <- dplyr::select(
  table1,
  c(
    'appeal_id',
    'CI',
    'bfregoff',
    'noddate',
    'aod',
    'nodyear',
    'decdate',
    'appeal_duration',
    'decided',
    'first_corr',
    'susrtyp',
    'ctyp_ps0',
    'ctyp_ps1',
    'ctyp_ps2',
    'ctyp_ps3',
    'ctyp_ps4',
    'ctyp_ps5',
    'ctyp_ps6',
    'ctyp_ps7',
    'ctyp_ps8',
    'ctyp_ps9',
    'rep_unrepresented',
    'rep_attorney',
    'rep_servorg',
    'missing_pos',
    'sgender',
    'dob',
    'male',
    'sgender',
    'stermill',
    'shomeless',
    'sfinhard',
    'sadvage',
    'appellant_age_nod',
    'congress',
    'cd_match',
    'medianincome',
    'medianage',
    'prcntunemp',
    'prcntnotemploy',
    'prcntblack',
    'prcntwhite',
    'prcnths',
    'prcntba',
    'bfdc',
    'bfac',
    'qr'
  )
)


# RESTRICT TO APPEALS WITH NON-NEGATIVE APPEAL DURATION
# AND CI PRIOR TO FINAL BOARD DECISION
appeals_treated <- as.vector(data_to_match$appeal_id[data_to_match$CI ==
                                                       1 &
                                                       !(data_to_match$appeal_duration < 0) &
                                                       !(data_to_match$first_corr > data_to_match$decdate)])

first_corr <- as.vector(data_to_match$first_corr[data_to_match$CI == 1 &
                                                   !(data_to_match$appeal_duration < 0) &
                                                   !(data_to_match$first_corr > data_to_match$decdate)])
first_corr <- as.Date(first_corr)

noddate <- as.vector(data_to_match$noddate[data_to_match$CI == 1 &
                                             !(data_to_match$appeal_duration < 0) &
                                             !(data_to_match$first_corr > data_to_match$decdate)])
noddate <- as.Date(noddate)

qr <- as.vector(data_to_match$qr[data_to_match$CI == 1 &
                                   !(data_to_match$appeal_duration < 0) &
                                   !(data_to_match$first_corr > data_to_match$decdate)])

action <- as.vector(data_to_match$bfac[data_to_match$CI == 1 &
                                         !(data_to_match$appeal_duration < 0) &
                                         !(data_to_match$first_corr > data_to_match$decdate)])

type <- as.vector(data_to_match$susrtyp[data_to_match$CI == 1 &
                                          !(data_to_match$appeal_duration < 0) &
                                          !(data_to_match$first_corr > data_to_match$decdate)])

# CONCATENATE : REGOFF, GENDER, SERVICE PERIOD, REPRESENTATION TYPE
data_to_match <- data_to_match %>%
  mutate(gender = as.factor(sgender))

data_to_match <- data_to_match %>%
  mutate(age = as.factor(appellant_age_nod))

data_to_match <- data_to_match %>%
  mutate(
    strata = paste0(
      bfregoff,
      age,
      gender,
      ctyp_ps0,
      ctyp_ps1,
      ctyp_ps2,
      ctyp_ps3,
      ctyp_ps4,
      ctyp_ps5,
      ctyp_ps6,
      ctyp_ps7,
      ctyp_ps8,
      ctyp_ps9,
      missing_pos,
      rep_unrepresented,
      rep_attorney,
      rep_servorg
    ) %>% factor()
  )

data_to_match <- data_to_match %>%
  mutate(strata = as.factor(strata))

strata_treated <- as.vector(data_to_match$strata[data_to_match$CI == 1 &
                                                   !(data_to_match$appeal_duration < 0) &
                                                   !(data_to_match$first_corr > data_to_match$decdate)])

# CREATE EMPTY DATA FRAME FOR MATCHES

matched_dataset = data.frame()

# LOOP OVER TREATED APPEALS
for (i in 1:length(first_corr)) {
  print(i)
  
  y <- first_corr[i]
  
  appeal <- appeals_treated[i]
  
  t <- type[i]
  
  # RESTRICT TO APPEALS FILED WITHIN A MONTH OF TREATED CASE
  x <- noddate[i] - 30
  z <- noddate[i] + 30
  
  q <- qr[i]
  a <- action[i]
  covs <- strata_treated[i]
  
  # IDENTIFY SET OF POTENTIAL CONTROLS
  try(controls <- subset(
    data_to_match,
    decdate >= y &
      noddate >= x &
      noddate <= z & susrtyp == t & bfac == a & qr == q &
      strata == covs & CI == 0
  ))
  treated <- subset(data_to_match, appeal_id == appeal)
  
  combined <- rbind(treated, controls)
  
  # PROPENSITY SCORE MATCH ON NOD DATE
  try({
    m.out <- matchit(CI ~ noddate,
                     data = combined,
                     method = "nearest",
                     distance = "mahalanobis")
    
    m.data <- match.data(m.out)
    
    m.data <- m.data %>%
      mutate(adj_timeline = interval(y, decdate) %>%
               time_length(unit = "days") %>%
               trunc())
    
    matched_dataset <- rbind(matched_dataset, m.data)
  })
  
}
